# title: "integration_v2"
## author: "Jingwei Song"
## date: "06/30/2023"
use aggr_fltd_v2 (no sperm)
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
#library(Seurat, lib.loc = "/usr/local/lib/R/site-library")
setwd("~/blue_jsong1/Seurat_Jan_2023/Cell ranger v7 integrate ALL")
# load ACME seurat
#load("~/blue_jsong1/Seurat_Jan_2023/Cell ranger v7 integrate ALL/data/ACME_Filtered_seurat_PC50_dim15_res.0.5.RData")
load("data/ACME_Filtered_seurat_PC50_dim15_res.0.5.RData")
ACME <- filtered_seurat
rm(filtered_seurat)
# load Justin's seurat
#load("~/blue_jsong1/Seurat_Jan_2023/Cell ranger v7 integrate ALL/data/aggr_fltd_PC50_dim15_res.0.5_v2.RData")
load("data/aggr_fltd_PC50_dim15_res.0.5_v2.RData")
# load BR4/BR5 merged
#load("~/blue_jsong1/Seurat_Jan_2023/Cell ranger v7 integrate ALL/data/filtered_seurat_PC50_dims15_res0.5_wo_artefact.RData")
load("data/filtered_seurat_PC50_dims15_res0.5_wo_artefact.RData")
BR_new <- no_C2
rm(no_C2)
# all SCT as active assay
ACME
## An object of class Seurat
## 38857 features across 17641 samples within 2 assays
## Active assay: SCT (16823 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, umap
aggr_fltd
## An object of class Seurat
## 29865 features across 4023 samples within 2 assays
## Active assay: SCT (14797 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, umap
BR_new
## An object of class Seurat
## 39206 features across 26366 samples within 2 assays
## Active assay: SCT (17172 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, umap
#DefaultAssay(filtered_seurat) <- "SCT"
split_seurat <- list(aggr_fltd, ACME, BR_new)
# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list = split_seurat,
nfeatures = 3000)
# Prepare the SCT list object for integration
split_seurat <- PrepSCTIntegration(object.list = split_seurat,
anchor.features = integ_features)
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat,
normalization.method = "SCT",
anchor.features = integ_features)
seurat_integrated <- IntegrateData(anchorset = integ_anchors,
normalization.method = "SCT")
## Warning: Attempting to merge an SCTAssay with another Assay type
## Converting all to standard Assay objects.
## Warning: Attempting to merge an SCTAssay with another Assay type
## Converting all to standard Assay objects.
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.1 GiB
seurat_integrated # 53525 cells
## An object of class Seurat
## 42657 features across 48030 samples within 3 assays
## Active assay: integrated (3000 features, 3000 variable features)
## 2 other assays present: RNA, SCT
# Run PCA
seurat_integrated <- RunPCA(seurat_integrated, npcs = 50)
# Plot PCA
PCAPlot(seurat_integrated,
split.by = "sample")
# Run UMAP
seurat_integrated <- RunUMAP(seurat_integrated,
dims = 1:20,
reduction = "pca")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
seurat_integrated <- FindNeighbors(seurat_integrated, reduction = "pca", dims = 1:20)
seurat_integrated <- FindClusters(seurat_integrated, resolution = c(0.3,0.5))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 48030
## Number of edges: 1761532
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9641
## Number of communities: 20
## Elapsed time: 10 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 48030
## Number of edges: 1761532
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9505
## Number of communities: 22
## Elapsed time: 10 seconds
DimPlot(seurat_integrated,label = T) + NoLegend() + NoAxes()
DimPlot(seurat_integrated,label = T, split.by = "sample") + NoLegend() + NoAxes()
colnames(seurat_integrated@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "seq_folder" "nUMI" "nGene"
## [7] "log10GenesPerUMI" "mitoRatio" "cells"
## [10] "sample" "nCount_SCT" "nFeature_SCT"
## [13] "SCT_snn_res.0.5" "seurat_clusters" "integrated_snn_res.0.3"
## [16] "integrated_snn_res.0.5"
Idents(seurat_integrated) <- "integrated_snn_res.0.3"
DimPlot(seurat_integrated,label = T) + NoLegend() + NoAxes()
# methanol_new has some intermediary cells which could delete
DefaultAssay(seurat_integrated) <- "SCT"
FeaturePlot(seurat_integrated, features = c("HyS0018.100", "HyS0061.60", #HMG, PCNA
"HyS0008.263", "HyS0030.203", #Ncol1, Nematocilin
"HyS0041.99", "HyS0004.446", #chitinase,mucs
"HyS0085.53", "HyS0013.338", #ELAV, Rfamide
"HyS0017.253", "HyS0010.323",#Wnt5, Pitx
"HyS0001.667", "HyS0011.54"),
order = T, min.cutoff = 'q5', max.cutoff = 'q95')
## Warning in FeaturePlot(seurat_integrated, features = c("HyS0018.100",
## "HyS0061.60", : All cells have the same value (0.693147180559945) of
## HyS0001.667.
FeaturePlot(seurat_integrated, features = c("HyS0041.99", "HyS0004.446")) #gland cell
FeaturePlot(seurat_integrated, features = c("HyS0085.53", "HyS0013.338")) #neuron
FeaturePlot(seurat_integrated, features = c("HyS0053.57")) # NC unknown
# save(seurat_integrated, file="output/Seurat_integrated_features3k_PC50_dims20_res0.5_v2.RData")
# load
DimPlot(seurat_integrated,label = T) + NoLegend() + NoAxes()+ DarkTheme()
seurat_integrated@meta.data %>%
ggplot(aes(x=sample, fill=sample)) +
geom_bar() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells")
seurat_integrated@meta.data %>%
ggplot(aes(color=sample, x=nGene, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 300)
seurat_integrated@meta.data %>%
ggplot(aes(color=sample, x=nUMI, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 300)
seurat_integrated@meta.data %>%
ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
geom_vline(xintercept = 0.8)
seurat_integrated@meta.data %>%
ggplot(aes(color=sample, x=mitoRatio, fill=sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
geom_vline(xintercept = 0.2)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 28313 rows containing non-finite values (`stat_density()`).
seurat_integrated@meta.data %>%
ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~sample)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# # nb/cnidocytes
# #HyS0030.203 Nematocilin
# #HyS0042.80 ARSTnd2-like
# #HyS0008.263 Ncol1
# #HyS0004.359 Ncol3
#
# FeaturePlot(seurat_integrated, features = c("HyS0008.263", "HyS0004.359","HyS0042.80", "HyS0030.203" ), order = T, min.cutoff = 'q10')
#
#
# # stem cell
# # HyS0050.7 PIWI
# #HyS0049.34 PIWI-like
# #HyS0061.60 PCNA
# #HyS0098.34 PTER
# FeaturePlot(seurat_integrated, features = c("HyS0050.7", "HyS0049.34", "HyS0061.60", "HyS0098.34"), order = T, min.cutoff = 'q10')
# # only PTER is in the integrated assay,
#
# # Raw reads
# DefaultAssay(seurat_integrated) <- "RNA"
# FeaturePlot(seurat_integrated, features = c("HyS0050.7", "HyS0049.34", "HyS0061.60", "HyS0098.34"),
# slot = "counts", order = T, min.cutoff = 'q10')
#
# seurat_integrated <- NormalizeData(seurat_integrated, normalization.method = "LogNormalize")
#
# # how many cells with piwi expression (>=1?)
# counts <- GetAssayData(seurat_integrated, slot = "counts")
# counts[1:5, 1:5]
#
# piwi_counts <- counts["HyS0050.7",]
# summary(piwi_counts)
# nonzero <- piwi_counts > 0
# sum(nonzero) # 3670 cells with nonzero piwi expression
# head(nonzero)
#
# table(piwi_counts)
#
# HMG_counts <- counts["HyS0018.100",]
# plot(table((HMG_counts)))
# sessioninfo
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RCurl_1.98-1.12 cowplot_1.1.1
## [3] scales_1.2.1 Matrix_1.5-4.1
## [5] lubridate_1.9.2 forcats_1.0.0
## [7] stringr_1.5.0 dplyr_1.1.2
## [9] purrr_1.0.1 readr_2.1.4
## [11] tidyr_1.3.0 tibble_3.2.1
## [13] ggplot2_3.4.2 tidyverse_2.0.0
## [15] SeuratObject_4.1.3 Seurat_4.3.0
## [17] SingleCellExperiment_1.20.1 SummarizedExperiment_1.28.0
## [19] Biobase_2.58.0 GenomicRanges_1.50.2
## [21] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [23] S4Vectors_0.36.2 BiocGenerics_0.44.0
## [25] MatrixGenerics_1.10.0 matrixStats_0.63.0
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.8 igraph_1.4.3 lazyeval_0.2.2
## [4] sp_1.6-0 splines_4.2.3 listenv_0.9.0
## [7] scattermore_1.1 digest_0.6.31 htmltools_0.5.5
## [10] fansi_1.0.4 magrittr_2.0.3 tensor_1.5
## [13] cluster_2.1.4 ROCR_1.0-11 tzdb_0.4.0
## [16] globals_0.16.2 timechange_0.2.0 spatstat.sparse_3.0-1
## [19] colorspace_2.1-0 ggrepel_0.9.3 xfun_0.39
## [22] jsonlite_1.8.4 progressr_0.13.0 spatstat.data_3.0-1
## [25] survival_3.5-5 zoo_1.8-12 glue_1.6.2
## [28] polyclip_1.10-4 gtable_0.3.3 zlibbioc_1.44.0
## [31] XVector_0.38.0 leiden_0.4.3 DelayedArray_0.24.0
## [34] future.apply_1.11.0 abind_1.4-5 DBI_1.1.3
## [37] spatstat.random_3.1-5 miniUI_0.1.1.1 Rcpp_1.0.10
## [40] viridisLite_0.4.2 xtable_1.8-4 reticulate_1.28
## [43] htmlwidgets_1.6.2 httr_1.4.6 RColorBrewer_1.1-3
## [46] ellipsis_0.3.2 ica_1.0-3 farver_2.1.1
## [49] pkgconfig_2.0.3 sass_0.4.6 uwot_0.1.14
## [52] deldir_1.0-9 utf8_1.2.3 labeling_0.4.2
## [55] tidyselect_1.2.0 rlang_1.1.1 reshape2_1.4.4
## [58] later_1.3.1 munsell_0.5.0 tools_4.2.3
## [61] cachem_1.0.8 cli_3.6.1 generics_0.1.3
## [64] ggridges_0.5.4 evaluate_0.21 fastmap_1.1.1
## [67] yaml_2.3.7 goftest_1.2-3 knitr_1.43
## [70] fitdistrplus_1.1-11 RANN_2.6.1 pbapply_1.7-0
## [73] future_1.32.0 nlme_3.1-162 mime_0.12
## [76] compiler_4.2.3 rstudioapi_0.14 plotly_4.10.1
## [79] png_0.1-8 spatstat.utils_3.0-3 bslib_0.4.2
## [82] stringi_1.7.12 highr_0.10 lattice_0.21-8
## [85] vctrs_0.6.2 pillar_1.9.0 lifecycle_1.0.3
## [88] spatstat.geom_3.2-1 lmtest_0.9-40 jquerylib_0.1.4
## [91] RcppAnnoy_0.0.20 data.table_1.14.8 bitops_1.0-7
## [94] irlba_2.3.5.1 httpuv_1.6.11 patchwork_1.1.2
## [97] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-21
## [100] gridExtra_2.3 parallelly_1.36.0 codetools_0.2-19
## [103] MASS_7.3-60 withr_2.5.0 sctransform_0.3.5
## [106] GenomeInfoDbData_1.2.9 mgcv_1.8-42 parallel_4.2.3
## [109] hms_1.1.3 grid_4.2.3 rmarkdown_2.21
## [112] Rtsne_0.16 spatstat.explore_3.2-1 shiny_1.7.4